
# clear environment
rm(list=ls())

# make sure working directory is set to folder within which this script is saved
getwd()

# packages for factor analyses
library(psych)
library(GPArotation)

#### Bring in Datasets ####

# Lucid 2018 Survey
rt_data <- read.csv('summer18_study/neg_bias_analysis_file_18.csv')

# Qualtrics Panels Survey
loss_data <- read.csv('toubia_study/DEEP Data_for R.csv')

# Lucid 2019 Survey
loss_data_19 <- read.csv("summer19_study/neg_bias_analysis_file_19.csv")

# Lucid 2020 Survey
loss_data_20 <- read.csv("summer20_study/neg_bias_analysis_file_20.csv")

#### Add Necessary Vars for Merging of Datasets ####

##Note: this is all built around accomodating rt_data, as its the largest

# Task Vars for subsetting
loss_data$task <- "toubia"
loss_data_19$task <- "summer19"
loss_data_20$task <- "summer20"

# RT mods to fit other datasets to enable merge
rt_data <- data.frame(age01 = rt_data$age, black = rt_data$black,
    hisp = rt_data$hisp,educ01 = rt_data$educ, unemp = rt_data$unemp,
    income01 = rt_data$income, engage01 = rt_data$engage, gay01 = rt_data$gay,
    abort01 = rt_data$abort, imm01 = rt_data$imm, affirm01 = rt_data$affirm,
    milit01 = rt_data$milit, wage01 = rt_data$wage, insure01 = rt_data$insure,
    imports01 = rt_data$import, ssec01 = rt_data$ssec, tax01 = rt_data$tax,
    open01 = rt_data$open, auth01 = rt_data$auth, nclose01 = rt_data$nfc,
    conserve01 = rt_data$conserve, deeploss = rt_data$neg_bias, right01 = rt_data$pol_id,
    neg_bias_std = rt_data$neg_bias_std, female = rt_data$female,
    task = rt_data$task,trad1 = rt_data$trad1, trad2 = rt_data$trad2,
    trad3 = rt_data$trad3, trad4 = rt_data$trad4,lim1 = rt_data$lim1,
    lim2 = rt_data$lim2, lim3 = rt_data$lim3, kwloss_3_std = rt_data$kwloss_3_std,
    dscore_correct_std = rt_data$dscore_correct_std, flk_pos_only = rt_data$flk_pos_only,
    flk_neg_only = rt_data$flk_neg_only)
rt_data$alpha <- rep(NA, nrow(rt_data))
rt_data$sigma <- rep(NA, nrow(rt_data))
rt_data$avoid_bev_std <- rep(NA, nrow(rt_data))
rt_data$val_wt_std <- rep(NA, nrow(rt_data))

## Toubia mods to fit rt_data

# Loss aversion estimates
loss_data$deeploss <- loss_data$lambda
loss_data$neg_bias_std <- scale(loss_data$deeploss)

# alternative cutoffs of neg bias
summary(loss_data$deeploss) # never gets above 3
loss_data$kwloss_3_std <- ifelse(loss_data$deeploss > 0.334,
                                 loss_data$deeploss, NA)
loss_data$kwloss_3_std <- (loss_data$kwloss_3_std -
                             mean(loss_data$kwloss_3_std, na.rm = T)) / sd(loss_data$kwloss_3_std, na.rm = T)

# Add additional columns for merge
loss_data$dscore_correct_std <- rep(NA, nrow(loss_data))
loss_data$flk_pos_only <- rep(NA, nrow(loss_data))
loss_data$flk_neg_only <- rep(NA, nrow(loss_data))
loss_data$avoid_bev_std <- rep(NA, nrow(loss_data))
loss_data$val_wt_std <- rep(NA, nrow(loss_data))

## Lucid 2019 mods to fit rt_data

# Add additional columns for merge
loss_data_19$dscore_correct_std <- rep(NA, nrow(loss_data_19))
loss_data_19$flk_pos_only <- rep(NA, nrow(loss_data_19))
loss_data_19$flk_neg_only <- rep(NA, nrow(loss_data_19))
loss_data_19$avoid_bev_std <- rep(NA, nrow(loss_data_19))
loss_data_19$val_wt_std <- rep(NA, nrow(loss_data_19))
summer19_data <- data.frame(age01 = loss_data_19$age, black = loss_data_19$black,
    hisp = loss_data_19$hisp,educ01 = loss_data_19$educ, unemp = loss_data_19$unemp,
    income01 = loss_data_19$income, engage01 = loss_data_19$engage,gay01 = loss_data_19$gay,
    abort01 = loss_data_19$abort, imm01 = loss_data_19$imm, affirm01 = loss_data_19$affirm,
    milit01 = loss_data_19$milit, wage01 = loss_data_19$wage, insure01 = loss_data_19$insure,
    imports01 = loss_data_19$import, ssec01 = loss_data_19$ssec, tax01 = loss_data_19$tax,
    open01 = loss_data_19$open,
    auth01 = loss_data_19$auth, nclose01 = loss_data_19$nfc,conserve01 = loss_data_19$conserve,
    deeploss = loss_data_19$lambda, right01 = loss_data_19$pol_id,
    neg_bias_std = loss_data_19$neg_bias_std, female = loss_data_19$female,
    task = loss_data_19$task,trad1 = loss_data_19$trad1, trad2 = loss_data_19$trad2,
    trad3 = loss_data_19$trad3, trad4 = loss_data_19$trad4,lim1 = loss_data_19$lim1,
    lim2 = loss_data_19$lim2, lim3 = loss_data_19$lim3,
    kwloss_3_std = loss_data_19$neg_bias_std_3,
    dscore_correct_std = loss_data_19$dscore_correct_std,
    flk_pos_only = loss_data_19$flk_pos_only, flk_neg_only = loss_data_19$flk_neg_only,
    alpha = loss_data_19$alpha, sigma = loss_data_19$sigma,
    avoid_bev_std = loss_data_19$avoid_bev_std, val_wt_std = loss_data_19$val_wt_std)

## Lucid 2020 mods to fit rt_data

# Add additional columns for merge
loss_data_20$kwloss_3_std <- rep(NA, nrow(loss_data_20))
loss_data_20$deeploss <- rep(NA, nrow(loss_data_20))
loss_data_20$dscore_correct_std <- rep(NA, nrow(loss_data_20))
loss_data_20$flk_pos_only <- rep(NA, nrow(loss_data_20))
loss_data_20$flk_neg_only <- rep(NA, nrow(loss_data_20))
loss_data_20$alpha <- rep(NA, nrow(loss_data_20))
loss_data_20$sigma <- rep(NA, nrow(loss_data_20))
summer20_data <- data.frame(age01 = loss_data_20$age, black = loss_data_20$black,
    hisp = loss_data_20$hisp,educ01 = loss_data_20$educ, unemp = loss_data_20$unemp,
    income01 = loss_data_20$income, engage01 = loss_data_20$engage,gay01 = loss_data_20$gay,
    abort01 = loss_data_20$abort, imm01 = loss_data_20$imm, affirm01 = loss_data_20$affirm,
    milit01 = loss_data_20$milit, wage01 = loss_data_20$wage, insure01 = loss_data_20$insure,
    imports01 = loss_data_20$import, ssec01 = loss_data_20$ssec, tax01 = loss_data_20$tax,
    open01 = loss_data_20$open, auth01 = loss_data_20$auth, nclose01 = loss_data_20$nfc,
    conserve01 = loss_data_20$conserve, deeploss = loss_data_20$deeploss,
    right01 = loss_data_20$pol_id, neg_bias_std = loss_data_20$neg_bias_std,
    female = loss_data_20$female, task = loss_data_20$task,
    trad1 = loss_data_20$trad1, trad2 = loss_data_20$trad2,
    trad3 = loss_data_20$trad3, trad4 = loss_data_20$trad4,lim1 = loss_data_20$lim1,
    lim2 = loss_data_20$lim2, lim3 = loss_data_20$lim3,  kwloss_3_std = loss_data_20$kwloss_3_std,
    dscore_correct_std = loss_data_20$dscore_correct_std, flk_pos_only = loss_data_20$flk_pos_only,
    flk_neg_only = loss_data_20$flk_neg_only,
    alpha = loss_data_20$alpha, sigma = loss_data_20$sigma,
    avoid_bev_std = loss_data_20$avoid_bev_std, val_wt_std = loss_data_20$val_wt_std)

#### Add Factor Analysis Vars

### Lucid 2018 (for each study)

# create subsetted datasets, one for each study
rt_data_loss <- subset(rt_data, rt_data$task == "loss")
rt_data_flank <- subset(rt_data, rt_data$task == "flank")
rt_data_lex <- subset(rt_data, rt_data$task == "lex")

## Loss study estimates

# generate correlation matrix of all issue vars
cormat <- cor(cbind(rt_data_loss[,c("trad1","trad2","trad3","trad4","lim1","lim2","lim3",
    "gay01","affirm01","imm01","abort01","insure01","ssec01","wage01",
    "tax01","milit01","imports01")]), use = "complete.obs")

# run principal factors analysis for all issues, econ issues, and social issues, separately
ideo_efa_1 <- fa(cormat, nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_econ <- fa(cormat[c(5:7,12:15), c(5:7,12:15)], nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_soc <- fa(cormat[c(1:4,8:11), c(1:4,8:11)], nfactors = 1, rotate = "promax", scores = "regression")

# Proportion variance explained by 1st dimension (for Supplemental Materials B)
ideo_efa_1$Vaccounted # 26%
ideo_efa_econ$Vaccounted # 31%
ideo_efa_soc$Vaccounted # 36%

# extract 1st dimension factor scores for all issues, econ issues, and social issues, separately
fa.scores_1 <- factor.scores(rt_data_loss[,c("trad1","trad2","trad3","trad4",
    "lim1","lim2","lim3","gay01","affirm01","imm01","abort01","insure01", "ssec01",
    "wage01","tax01","milit01","imports01")], ideo_efa_1, method="regression")
fa.scores_econ <- factor.scores(rt_data_loss[,c("lim1","lim2","lim3","insure01",
    "ssec01","wage01","tax01")], ideo_efa_econ, method="regression")
fa.scores_soc <- factor.scores(rt_data_loss[,c("trad1","trad2","trad3","trad4",
    "gay01","affirm01","imm01","abort01")], ideo_efa_soc, method="regression")

# put standardized factor scores into new variables in survey data
rt_data_loss$conservative01 <- (fa.scores_1$scores[ ,1] - min(fa.scores_1$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_1$scores[ ,1], na.rm = T) - min(fa.scores_1$scores[ ,1], na.rm = T))
rt_data_loss$social01 <- (fa.scores_soc$scores[ ,1] - min(fa.scores_soc$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_soc$scores[ ,1], na.rm = T) - min(fa.scores_soc$scores[ ,1], na.rm = T))
rt_data_loss$econ01 <- (fa.scores_econ$scores[ ,1] - min(fa.scores_econ$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_econ$scores[ ,1], na.rm = T) - min(fa.scores_econ$scores[ ,1], na.rm = T))

## Flanker study

cormat <- cor(cbind(rt_data_flank[,c("trad1","trad2","trad3","trad4","lim1","lim2","lim3",
    "gay01","affirm01","imm01","abort01","insure01","ssec01","wage01",
    "tax01","milit01","imports01")]), use = "complete.obs")

ideo_efa_1 <- fa(cormat, nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_econ <- fa(cormat[c(5:7,12:15), c(5:7,12:15)], nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_soc <- fa(cormat[c(1:4,8:11), c(1:4,8:11)], nfactors = 1, rotate = "promax", scores = "regression")

# Proportion variance explained by 1st dimension (for Supplemental Materials B)
ideo_efa_1$Vaccounted # 24%
ideo_efa_econ$Vaccounted # 29%
ideo_efa_soc$Vaccounted # 34%

fa.scores_1 <- factor.scores(rt_data_flank[,c("trad1","trad2","trad3","trad4",
    "lim1","lim2","lim3","gay01","affirm01","imm01","abort01","insure01", "ssec01",
    "wage01","tax01","milit01","imports01")], ideo_efa_1, method="regression")
fa.scores_econ <- factor.scores(rt_data_flank[,c("lim1","lim2","lim3","insure01",
    "ssec01","wage01","tax01")], ideo_efa_econ, method="regression")
fa.scores_soc <- factor.scores(rt_data_flank[,c("trad1","trad2","trad3","trad4",
    "gay01","affirm01","imm01","abort01")], ideo_efa_soc, method="regression")

rt_data_flank$conservative01 <- (fa.scores_1$scores[ ,1] - min(fa.scores_1$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_1$scores[ ,1], na.rm = T) - min(fa.scores_1$scores[ ,1], na.rm = T))
rt_data_flank$social01 <- (fa.scores_soc$scores[ ,1] - min(fa.scores_soc$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_soc$scores[ ,1], na.rm = T) - min(fa.scores_soc$scores[ ,1], na.rm = T))
rt_data_flank$econ01 <- (fa.scores_econ$scores[ ,1] - min(fa.scores_econ$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_econ$scores[ ,1], na.rm = T) - min(fa.scores_econ$scores[ ,1], na.rm = T))

## Lexical decision task study

cormat <- cor(cbind(rt_data_lex[,c("trad1","trad2","trad3","trad4","lim1","lim2","lim3",
    "gay01","affirm01","imm01","abort01","insure01","ssec01","wage01",
    "tax01","milit01","imports01")]), use = "complete.obs")

ideo_efa_1 <- fa(cormat, nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_econ <- fa(cormat[c(5:7,12:15), c(5:7,12:15)], nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_soc <- fa(cormat[c(1:4,8:11), c(1:4,8:11)], nfactors = 1, rotate = "promax", scores = "regression")

# Proportion variance explained by 1st dimension (for Supplemental Materials B)
ideo_efa_1$Vaccounted # 25%
ideo_efa_econ$Vaccounted # 30%
ideo_efa_soc$Vaccounted # 36%

fa.scores_1 <- factor.scores(rt_data_lex[,c("trad1","trad2","trad3","trad4",
    "lim1","lim2","lim3","gay01","affirm01","imm01","abort01","insure01", "ssec01",
    "wage01","tax01","milit01","imports01")], ideo_efa_1, method="regression")
fa.scores_econ <- factor.scores(rt_data_lex[,c("lim1","lim2","lim3","insure01",
    "ssec01","wage01","tax01")], ideo_efa_econ, method="regression")
fa.scores_soc <- factor.scores(rt_data_lex[,c("trad1","trad2","trad3","trad4",
    "gay01","affirm01","imm01","abort01")], ideo_efa_soc, method="regression")

rt_data_lex$conservative01 <- (fa.scores_1$scores[ ,1] - min(fa.scores_1$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_1$scores[ ,1], na.rm = T) - min(fa.scores_1$scores[ ,1], na.rm = T))
rt_data_lex$social01 <- (fa.scores_soc$scores[ ,1] - min(fa.scores_soc$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_soc$scores[ ,1], na.rm = T) - min(fa.scores_soc$scores[ ,1], na.rm = T))
rt_data_lex$econ01 <- (fa.scores_econ$scores[ ,1] - min(fa.scores_econ$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_econ$scores[ ,1], na.rm = T) - min(fa.scores_econ$scores[ ,1], na.rm = T))

## Bring all studies back together
rt_data <- rbind(rt_data_loss, rt_data_flank)
rt_data <- rbind(rt_data, rt_data_lex)


### Toubia (2014 Qualtrics Panels Study)

cormat <- cor(cbind(loss_data[,c("trad1","trad2","trad3","trad4","lim1","lim2","lim3",
    "gay01","affirm01","imm01","abort01","insure01","ssec01","wage01",
    "tax01","milit01","imports01")]), use = "complete.obs")

ideo_efa_1 <- fa(cormat, nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_econ <- fa(cormat[c(5:7,12:15), c(5:7,12:15)], nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_soc <- fa(cormat[c(1:4,8:11), c(1:4,8:11)], nfactors = 1, rotate = "promax", scores = "regression")

# Proportion variance explained by 1st dimension (for Supplemental Materials B)
ideo_efa_1$Vaccounted # 31%
ideo_efa_econ$Vaccounted # 42%
ideo_efa_soc$Vaccounted # 42%

fa.scores_1 <- factor.scores(loss_data[,c("trad1","trad2","trad3","trad4",
    "lim1","lim2","lim3","gay01","affirm01","imm01","abort01","insure01", "ssec01",
    "wage01","tax01","milit01","imports01")], ideo_efa_1, method="regression")
fa.scores_econ <- factor.scores(loss_data[,c("lim1","lim2","lim3","insure01",
    "ssec01","wage01","tax01")], ideo_efa_econ, method="regression")
fa.scores_soc <- factor.scores(loss_data[,c("trad1","trad2","trad3","trad4",
    "gay01","affirm01","imm01","abort01")], ideo_efa_soc, method="regression")

loss_data$conservative01 <- (fa.scores_1$scores[ ,1] - min(fa.scores_1$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_1$scores[ ,1], na.rm = T) - min(fa.scores_1$scores[ ,1], na.rm = T))
loss_data$social01 <- (fa.scores_soc$scores[ ,1] - min(fa.scores_soc$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_soc$scores[ ,1], na.rm = T) - min(fa.scores_soc$scores[ ,1], na.rm = T))
loss_data$econ01 <- (fa.scores_econ$scores[ ,1] - min(fa.scores_econ$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_econ$scores[ ,1], na.rm = T) - min(fa.scores_econ$scores[ ,1], na.rm = T))


### Lucid 2019

cormat <- cor(cbind(summer19_data[,c("trad1","trad2","trad3","trad4","lim1","lim2","lim3",
    "gay01","affirm01","imm01","abort01","insure01","ssec01","wage01",
    "tax01","milit01","imports01")]), use = "complete.obs")

ideo_efa_1 <- fa(cormat, nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_econ <- fa(cormat[c(5:7,12:15), c(5:7,12:15)], nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_soc <- fa(cormat[c(1:4,8:11), c(1:4,8:11)], nfactors = 1, rotate = "promax", scores = "regression")

# Proportion variance explained by 1st dimension (for Supplemental Materials B)
ideo_efa_1$Vaccounted # 24%
ideo_efa_econ$Vaccounted # 28%
ideo_efa_soc$Vaccounted # 34%

fa.scores_1 <- factor.scores(summer19_data[,c("trad1","trad2","trad3","trad4",
    "lim1","lim2","lim3","gay01","affirm01","imm01","abort01","insure01", "ssec01",
    "wage01","tax01","milit01","imports01")], ideo_efa_1, method="regression")
fa.scores_econ <- factor.scores(summer19_data[,c("lim1","lim2","lim3","insure01",
    "ssec01","wage01","tax01")], ideo_efa_econ, method="regression")
fa.scores_soc <- factor.scores(summer19_data[,c("trad1","trad2","trad3","trad4",
    "gay01","affirm01","imm01","abort01")], ideo_efa_soc, method="regression")

summer19_data$conservative01 <- (fa.scores_1$scores[ ,1] - min(fa.scores_1$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_1$scores[ ,1], na.rm = T) - min(fa.scores_1$scores[ ,1], na.rm = T))
summer19_data$social01 <- (fa.scores_soc$scores[ ,1] - min(fa.scores_soc$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_soc$scores[ ,1], na.rm = T) - min(fa.scores_soc$scores[ ,1], na.rm = T))
summer19_data$econ01 <- (fa.scores_econ$scores[ ,1] - min(fa.scores_econ$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_econ$scores[ ,1], na.rm = T) - min(fa.scores_econ$scores[ ,1], na.rm = T))


### Lucid 2020

cormat <- cor(cbind(summer20_data[,c("trad1","trad2","trad3","trad4","lim1","lim2","lim3",
    "gay01","affirm01","imm01","abort01","insure01","ssec01","wage01",
    "tax01","milit01","imports01")]), use = "complete.obs")

ideo_efa_1 <- fa(cormat, nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_econ <- fa(cormat[c(5:7,12:15), c(5:7,12:15)], nfactors = 1, rotate = "promax", scores = "regression")
ideo_efa_soc <- fa(cormat[c(1:4,8:11), c(1:4,8:11)], nfactors = 1, rotate = "promax", scores = "regression")

# Proportion variance explained by 1st dimension (for Supplemental Materials B)
ideo_efa_1$Vaccounted # 23%
ideo_efa_econ$Vaccounted # 26%
ideo_efa_soc$Vaccounted # 31%

fa.scores_1 <- factor.scores(summer20_data[,c("trad1","trad2","trad3","trad4",
    "lim1","lim2","lim3","gay01","affirm01","imm01","abort01","insure01", "ssec01",
    "wage01","tax01","milit01","imports01")], ideo_efa_1, method="regression")
fa.scores_econ <- factor.scores(summer20_data[,c("lim1","lim2","lim3","insure01",
    "ssec01","wage01","tax01")], ideo_efa_econ, method="regression")
fa.scores_soc <- factor.scores(summer20_data[,c("trad1","trad2","trad3","trad4",
    "gay01","affirm01","imm01","abort01")], ideo_efa_soc, method="regression")

summer20_data$conservative01 <- (fa.scores_1$scores[ ,1] - min(fa.scores_1$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_1$scores[ ,1], na.rm = T) - min(fa.scores_1$scores[ ,1], na.rm = T))
summer20_data$social01 <- (fa.scores_soc$scores[ ,1] - min(fa.scores_soc$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_soc$scores[ ,1], na.rm = T) - min(fa.scores_soc$scores[ ,1], na.rm = T))
summer20_data$econ01 <- (fa.scores_econ$scores[ ,1] - min(fa.scores_econ$scores[ ,1], na.rm = T)) / 
  (max(fa.scores_econ$scores[ ,1], na.rm = T) - min(fa.scores_econ$scores[ ,1], na.rm = T))


#### Data Merge ####

full_data <- loss_data[names(rt_data)]
full_data <- rbind(full_data, rt_data) 
full_data <- rbind(full_data, summer19_data)
full_data <- rbind(full_data, summer20_data)

write.csv(full_data, "neg_bias_analysis_file.csv", row.names = F)
